load data

data('ToothGrowth') 
head(ToothGrowth) 
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5

Exploratory Data Analysis

summary(ToothGrowth)
##       len        supp         dose      
##  Min.   : 4.20   OJ:30   Min.   :0.500  
##  1st Qu.:13.07   VC:30   1st Qu.:0.500  
##  Median :19.25           Median :1.000  
##  Mean   :18.81           Mean   :1.167  
##  3rd Qu.:25.27           3rd Qu.:2.000  
##  Max.   :33.90           Max.   :2.000
nrow(ToothGrowth)
## [1] 60
ncol(ToothGrowth)
## [1] 3
names(ToothGrowth)
## [1] "len"  "supp" "dose"
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
ggpairs(ToothGrowth)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2. Comparing tooth growth by supp and dose using statistical tests

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
par(mfrow=c(2,3))
plot_ly(data=ToothGrowth, x=~supp, y=~len, type="box")

Change ‘dose’ from numerical variable to factor variable

ToothGrowth$dose = factor(ToothGrowth$dose, levels=c(0.5,1.0,2.0),labels=c("low","med","high"))   
str(ToothGrowth)
## 'data.frame':    60 obs. of  3 variables:
##  $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
##  $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dose: Factor w/ 3 levels "low","med","high": 1 1 1 1 1 1 1 1 1 1 ...

Replication

replications(len ~ supp * dose, data=ToothGrowth)
##      supp      dose supp:dose 
##        30        20        10
replications(len ~ supp * dose, data=ToothGrowth[1:58,])
## $supp
## supp
## OJ VC 
## 28 30 
## 
## $dose
## dose
##  low  med high 
##   20   20   18 
## 
## $`supp:dose`
##     dose
## supp low med high
##   OJ  10  10    8
##   VC  10  10   10
aggregate(ToothGrowth$len, by=list(ToothGrowth$supp,ToothGrowth$dose), FUN = mean)
##   Group.1 Group.2     x
## 1      OJ     low 13.23
## 2      VC     low  7.98
## 3      OJ     med 22.70
## 4      VC     med 16.77
## 5      OJ    high 26.06
## 6      VC    high 26.14
aggregate(ToothGrowth$len, by=list(ToothGrowth$supp,ToothGrowth$dose), FUN = sd)
##   Group.1 Group.2        x
## 1      OJ     low 4.459709
## 2      VC     low 2.746634
## 3      OJ     med 3.910953
## 4      VC     med 2.515309
## 5      OJ    high 2.655058
## 6      VC    high 4.797731
dose <- factor(ToothGrowth$dose)
fit <- aov(ToothGrowth$len ~ ToothGrowth$supp*ToothGrowth$dose)
summary(fit)
##                                   Df Sum Sq Mean Sq F value   Pr(>F)    
## ToothGrowth$supp                   1  205.4   205.4  15.572 0.000231 ***
## ToothGrowth$dose                   2 2426.4  1213.2  92.000  < 2e-16 ***
## ToothGrowth$supp:ToothGrowth$dose  2  108.3    54.2   4.107 0.021860 *  
## Residuals                         54  712.1    13.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dose <- factor(dose)
fit <- aov(ToothGrowth$len ~ ToothGrowth$supp*ToothGrowth$dose)
summary(fit)
##                                   Df Sum Sq Mean Sq F value   Pr(>F)    
## ToothGrowth$supp                   1  205.4   205.4  15.572 0.000231 ***
## ToothGrowth$dose                   2 2426.4  1213.2  92.000  < 2e-16 ***
## ToothGrowth$supp:ToothGrowth$dose  2  108.3    54.2   4.107 0.021860 *  
## Residuals                         54  712.1    13.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Graphical Explanation

Box Plot

 boxplot(len ~ supp * dose, data=ToothGrowth, ylab="Tooth Length", main="Boxplots of Tooth Growth Data", col= "skyblue")

Interaction Plot

with(ToothGrowth, interaction.plot(x.factor=dose, trace.factor=supp,response=len, fun=mean, type="b", legend=T,ylab="Tooth Length", main="Interaction Plot",pch=c(1,19)))

Conditional Plot

coplot(len ~ dose | supp, data=ToothGrowth, panel=panel.smooth,xlab="ToothGrowth data: length vs dose, given type of supplement", col= "red")

Using model.tables() function

aov.out = aov(len ~ supp * dose, data=ToothGrowth)
model.tables(aov.out, type="means", se=T)
## Tables of means
## Grand mean
##          
## 18.81333 
## 
##  supp 
## supp
##     OJ     VC 
## 20.663 16.963 
## 
##  dose 
## dose
##    low    med   high 
## 10.605 19.735 26.100 
## 
##  supp:dose 
##     dose
## supp low   med   high 
##   OJ 13.23 22.70 26.06
##   VC  7.98 16.77 26.14
## 
## Standard errors for differences of means
##           supp   dose supp:dose
##         0.9376 1.1484    1.6240
## replic.     30     20        10
summary(aov.out)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## supp         1  205.4   205.4  15.572 0.000231 ***
## dose         2 2426.4  1213.2  92.000  < 2e-16 ***
## supp:dose    2  108.3    54.2   4.107 0.021860 *  
## Residuals   54  712.1    13.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Treatment Contrasts

options("contrasts")
## $contrasts
##         unordered           ordered 
## "contr.treatment"      "contr.poly"
summary.lm(aov.out)
## 
## Call:
## aov(formula = len ~ supp * dose, data = ToothGrowth)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -8.20  -2.72  -0.27   2.65   8.27 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       13.230      1.148  11.521 3.60e-16 ***
## suppVC            -5.250      1.624  -3.233  0.00209 ** 
## dosemed            9.470      1.624   5.831 3.18e-07 ***
## dosehigh          12.830      1.624   7.900 1.43e-10 ***
## suppVC:dosemed    -0.680      2.297  -0.296  0.76831    
## suppVC:dosehigh    5.330      2.297   2.321  0.02411 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.631 on 54 degrees of freedom
## Multiple R-squared:  0.7937, Adjusted R-squared:  0.7746 
## F-statistic: 41.56 on 5 and 54 DF,  p-value: < 2.2e-16

Conclusion of the Hypothesis Testing

The mean of the OJ-low cell: We find this mean to be significantly different from zero, not an especially interesting result. The next line of the table, “suppVC”, shows the difference between the means of the VC-low cell and the OJ-low cell.

The standard error is for the difference between these means, so the difference is found to be significant. The “dosemed” line shows the difference between the means of the OJ-med cell and the OJ-low cell, and once again this difference is found to be statistically significant. The “dosehigh” line shows the difference in means between the OJ-high cell and the OJ-low cell, also significant. The last two lines test various elements of the interaction.

Appendix

cat("Student Name: Abhilash Dikshit","\n")
cat("Student Roll Number: 002702209")

data('ToothGrowth') 
head(ToothGrowth) 

summary(ToothGrowth)
nrow(ToothGrowth)
ncol(ToothGrowth)
names(ToothGrowth)

library(GGally)
ggpairs(ToothGrowth)

library(plotly)
par(mfrow=c(2,3))
plot_ly(data=ToothGrowth, x=~supp, y=~len, type="box")

ToothGrowth$dose = factor(ToothGrowth$dose, levels=c(0.5,1.0,2.0),labels=c("low","med","high"))   

str(ToothGrowth)

replications(len ~ supp * dose, data=ToothGrowth)

replications(len ~ supp * dose, data=ToothGrowth[1:58,])

aggregate(ToothGrowth$len, by=list(ToothGrowth$supp,ToothGrowth$dose), FUN = mean)
aggregate(ToothGrowth$len, by=list(ToothGrowth$supp,ToothGrowth$dose), FUN = sd)

dose <- factor(ToothGrowth$dose)
fit <- aov(ToothGrowth$len ~ ToothGrowth$supp*ToothGrowth$dose)
summary(fit)

dose <- factor(dose)
fit <- aov(ToothGrowth$len ~ ToothGrowth$supp*ToothGrowth$dose)
summary(fit)

 boxplot(len ~ supp * dose, data=ToothGrowth, ylab="Tooth Length", main="Boxplots of Tooth Growth Data", col= "skyblue")

with(ToothGrowth, interaction.plot(x.factor=dose, trace.factor=supp,response=len, fun=mean, type="b", legend=T,ylab="Tooth Length", main="Interaction Plot",pch=c(1,19)))

coplot(len ~ dose | supp, data=ToothGrowth, panel=panel.smooth,xlab="ToothGrowth data: length vs dose, given type of supplement", col= "red")

aov.out = aov(len ~ supp * dose, data=ToothGrowth)

model.tables(aov.out, type="means", se=T)

summary(aov.out)

options("contrasts")

summary.lm(aov.out)

## NA